/*
Clean EU-SILC cross-sectional personal register (R-file), 2004-2023.
Extracts demographic variables (age, sex, marital status) and identifies
households with children and child ages.

Uses:
$usern/Raw_Data/NO_CROSS/[year]/UDB_cNO[yy]R.csv  (years 2004-2023)

Creates:
$usern/Intermediate_Data/[year]_R_vars.dta  (years 2004-2023)
$usern/Intermediate_Data/0423_R_vars.dta
$usern/Intermediate_Data/0423_R_vars_inter.dta
$usern/Intermediate_Data/children_motherinfo_0423_R_vars.dta
$usern/Intermediate_Data/women_0423_R_vars.dta
$usern/Intermediate_Data/children_fatherinfo_0423_R_vars.dta
*/

*** Set directory
clear
global usern "$root/data/silc"

********************************************************************************
*** PERSONAL REGISTER (R-FILE)
********************************************************************************
// All current household members (of any age) and former household members

forv year=2004(1)2023 {
	local y = substr("`year'", -2, .) //keep last two digits
	display "`y'"
	display "`year'"
	
	import delimited "$usern/Raw_Data/NO_CROSS/`year'/UDB_cNO`y'R.csv", varnames(1) clear

	rename rb010 year 
	label var year "Survey year"

	rename rb030 persid 
	label var persid "hh id + pers number (2 digits)"

	rename rb080 birthyear 
	label var birthyear "Year of Birth"

	rename rb050 persweight 
	label var persweight "Personal cross-sectional weight"

	//rename rb082 age 
	//label var age "Age at the time of interview"

	rename rb090 sex 
	label var sex "Sex"
	gen female = sex == 2 
	label var female "Indicator for female)"

	rename rb220 father_id
	label var father_id "Father ID"

	rename rb230 mother_id
	label var mother_id "Mother ID"

	rename rb240 spouse_id
	label var spouse_id "Spouse/Partner ID"

	// Variables on time spent by children
	rename rl010 preschool
	label var preschool "Education at pre-school (hours per week)"

	rename rl020 compulsoryschool
	label var compulsoryschool "Education at compulsory school (hours per week)"

	rename rl030 childcare_services
	label var childcare_services "Childcare at centre-based services (outside school hours, before/after)"

	rename rl040 childcare_daycare
	label var childcare_daycare "Childcare at a day-care centre"

	rename rl050 childcare_prof
	label var childcare_prof "Childcare by professional childminder (home-based)"

	rename rl060 childcare_family
	label var childcare_family "Unpaid childcare by relatives/friends/neighbors"

	keep year persid persweight birthyear sex father_id mother_id ///
	spouse_id preschool compulsoryschool childcare_services childcare_daycare ///
	childcare_prof childcare_family female
	
	drop if persid==. | birthyear==. | sex==.

	save "$usern/Intermediate_Data/`year'_R_vars.dta", replace 
}

*** Append years
use "$usern/Intermediate_Data/2004_R_vars.dta", clear

forv year=2005(1)2023 {
	append using "$usern/Intermediate_Data/`year'_R_vars.dta"
}
save "$usern/Intermediate_Data/0423_R_vars.dta", replace
	
*** Generate HHID
gen hhid = substr(string(persid, "%12.0f"), 1, strlen(string(persid, "%12.0f")) - 2)
label var hhid "Household ID (persid without last 2 digits)"

bysort year hhid (persid): gen perscount = _N
label var perscount "Number of persons in hh"
bysort year hhid (persid): gen persn = _n

order year hhid persid persn perscount persweight birthyear sex female father_id ///
 mother_id spouse_id preschool compulsoryschool childcare_services childcare_daycare ///
 childcare_prof childcare_family
sort year persid 
save "$usern/Intermediate_Data/0423_R_vars_inter.dta", replace

*** Generate vars
use "$usern/Intermediate_Data/0423_R_vars_inter.dta", clear
gen age=year-birthyear
drop if age==. // 0 obs
label var age "Age"

gen is_child = !missing(father_id) | !missing(mother_id)
label var is_child "Indicator for child"

*** Collapse to parents dataset only and merge in childrens' info 
// Generate child dataset for mothers
preserve
keep if is_child == 1 & spouse_id==. // keep children (who do not have a spouse in the hh)
drop if mother_id==.
drop is_child persn persid sex female birthyear spouse_id father_id
rename age child_age
bysort year hhid (child_age): gen child_index = _n 
label var child_index "Child index within household (1=Youngest)"

reshape wide preschool compulsoryschool childcare_services childcare_daycare childcare_prof childcare_family child_age, i(year mother_id) j(child_index)

// Count number of children 
gen num_kids = 0
label var num_kids "Number of kids <=18years"

gen parent=0 
label var parent "Has a child of any age"

foreach var in child_age1 child_age2 child_age3 child_age4 child_age5 child_age6 child_age7 child_age8 child_age9 {
    replace num_kids = num_kids + 1 if !missing(`var') & `var'<=18
	replace parent=1 if !missing(`var')
}

save "$usern/Intermediate_Data/children_motherinfo_0423_R_vars.dta", replace
restore 

// merge info for mothers through mother_id
preserve 
keep if is_child == 0 & female==1 // Keep only woman
drop is_child preschool compulsoryschool childcare_services childcare_daycare childcare_prof childcare_family
replace mother_id=persid

merge 1:1 year hhid mother_id using "$usern/Intermediate_Data/children_motherinfo_0423_R_vars.dta", ///
keepusing(num_kids parent child_age1 preschool1 compulsoryschool1 childcare_services1 childcare_daycare1 childcare_prof1 childcare_family1)
drop if _merge==2
drop _merge
save "$usern/Intermediate_Data/women_0423_R_vars.dta", replace
restore 

// Generate child dataset for fathers
preserve
keep if is_child == 1 & spouse_id==. // keep children (who do not have a spouse in the hh)
drop if father_id==. 
drop is_child persn persid sex female birthyear spouse_id mother_id
rename age child_age
bysort year hhid (child_age): gen child_index = _n 
label var child_index "Child index within household (1=Youngest)"

reshape wide preschool compulsoryschool childcare_services childcare_daycare childcare_prof childcare_family child_age, i(year father_id) j(child_index)

// Count number of children 
gen num_kids = 0
label var num_kids "Number of kids <=18years"

gen parent=0 
label var parent "Has a child of any age"

foreach var in child_age1 child_age2 child_age3 child_age4 child_age5 child_age6 child_age7 child_age8 child_age9 {
    replace num_kids = num_kids + 1 if !missing(`var') & `var'<=18
	replace parent=1 if !missing(`var')
}

save "$usern/Intermediate_Data/children_fatherinfo_0423_R_vars.dta", replace
restore 

// merge info for fathers through father_id
keep if is_child == 0 & female==0 // Keep only men
drop is_child preschool compulsoryschool childcare_services childcare_daycare childcare_prof childcare_family
replace father_id=persid

merge 1:1 year hhid father_id using "$usern/Intermediate_Data/children_fatherinfo_0423_R_vars.dta", ///
keepusing(num_kids parent child_age1 preschool1 compulsoryschool1 childcare_services1 childcare_daycare1 childcare_prof1 childcare_family1)
drop if _merge==2
drop _merge

// Append women
append using "$usern/Intermediate_Data/women_0423_R_vars.dta"
drop father_id mother_id 
replace num_kids=0 if num_kids==.
replace parent=0 if parent==.

gen haschild=0
replace haschild=1 if num_kids>0 & child_age1<=18 
label var haschild "Has a child <= 18 years"


// Create indicator variables for youngest child ages 1 to 18
forval i = 0/18 {
    gen k_age_`i' = (child_age1 == `i')
    label var k_age_`i' "`i'"
}
save "$usern/Intermediate_Data/0423_R_vars.dta", replace 



